Tensorboard
In [1]:
import time
import os
import pandas as pd
project_name = 'Dog_Breed_Identification'
step_name = 'Train'
time_str = time.strftime("%Y%m%d_%H%M%S", time.localtime())
run_name = project_name + '_' + step_name + '_' + time_str
print('run_name: ' + run_name)
cwd = os.getcwd()
model_path = os.path.join(cwd, 'model')
print('model_path: ' + model_path)
In [2]:
import h5py
import numpy as np
from sklearn.utils import shuffle
np.random.seed(2017)
x_train = []
y_train = {}
x_val = []
y_val = {}
x_test = []
cwd = os.getcwd()
feature_cgg16 = os.path.join(cwd, 'model', 'feature_VGG16_{}.h5'.format(171023))
feature_cgg19 = os.path.join(cwd, 'model', 'feature_VGG19_{}.h5'.format(171023))
feature_resnet50 = os.path.join(cwd, 'model', 'feature_ResNet50_{}.h5'.format(171023))
feature_xception = os.path.join(cwd, 'model', 'feature_Xception_{}.h5'.format(171023))
feature_inception = os.path.join(cwd, 'model', 'feature_InceptionV3_{}.h5'.format(171023))
for filename in [feature_cgg16, feature_cgg19, feature_resnet50, feature_xception, feature_inception]:
with h5py.File(filename, 'r') as h:
x_train.append(np.array(h['train']))
y_train = np.array(h['train_label'])
x_val.append(np.array(h['val']))
y_val = np.array(h['val_label'])
x_test.append(np.array(h['test']))
# print(x_train[0].shape)
x_train = np.concatenate(x_train, axis=-1)
# y_train = np.concatenate(y_train, axis=0)
x_val = np.concatenate(x_val, axis=-1)
# y_val = np.concatenate(y_val, axis=0)
x_test = np.concatenate(x_test, axis=-1)
print(x_train.shape)
print(x_train.shape[1:])
print(len(y_train))
print(x_val.shape)
print(len(y_val))
print(x_test.shape)
In [3]:
from sklearn.utils import shuffle
(x_train, y_train) = shuffle(x_train, y_train)
In [4]:
# from keras.utils.np_utils import to_categorical
# y_train = to_categorical(y_train)
# y_val = to_categorical(y_val)
# print(y_train.shape)
# print(y_val.shape)
In [5]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=2017)
logreg.fit(x_train, y_train)
Out[5]:
In [6]:
val_proba = logreg.predict_proba(x_val)
val_preds = logreg.predict(x_val)
print(val_proba.shape)
print(val_preds.shape)
print(val_proba[:,1].shape)
print(y_val.shape)
In [7]:
from keras.utils.np_utils import to_categorical
print(val_proba[0])
print(y_val[0])
log_loss_y_val = to_categorical(y_val)
print(log_loss_y_val[0])
In [8]:
from sklearn.metrics import log_loss, accuracy_score
print('Val log_loss: {}'.format(log_loss(log_loss_y_val, val_proba)))
val_proba_limit = np.clip(log_loss_y_val, 0.005, 0.995)
print('Val log_loss: {}'.format(log_loss(log_loss_y_val, val_proba_limit)))
print('Val accuracy_score: {}'.format(accuracy_score(y_val, val_preds)))
In [9]:
# Used to load model directly and skip train
# import os
# from keras.models import load_model
# cwd = os.getcwd()
# model = load_model(os.path.join(cwd, 'model', 'Dog_Breed_Identification_Train_20171024_155154.h5'))
In [10]:
y_pred = logreg.predict_proba(x_test)
print(y_pred.shape)
In [11]:
print(y_pred[:10])
y_pred = np.clip(y_pred, 0.005, 0.995)
print(y_pred[:10])
In [12]:
files = os.listdir(os.path.join(cwd, 'input', 'data_test', 'test'))
print(files[:10])
In [13]:
cwd = os.getcwd()
df = pd.read_csv(os.path.join(cwd, 'input', 'labels.csv'))
print('lables amount: %d' %len(df))
df.head()
Out[13]:
In [14]:
n = len(df)
breed = set(df['breed'])
n_class = len(breed)
class_to_num = dict(zip(breed, range(n_class)))
num_to_class = dict(zip(range(n_class), breed))
print(breed)
In [15]:
df2 = pd.read_csv('.\\input\\sample_submission.csv')
n_test = len(df2)
print(df2.shape)
In [16]:
for i in range(0, 120):
df2.iloc[:,[i+1]] = y_pred[:,i]
df2.to_csv('.\\output\\pred.csv', index=None)
In [ ]:
In [17]:
print('Done !')
In [ ]: